#
# docker build -t pyspark .
# docker run -it pyspark /opt/spark/bin/pyspark
#
FROM spark:3.5.0-scala2.12-java11-ubuntu

USER root

RUN set -ex; \
    apt-get update; \
    apt-get install -y python3 python3-pip; \
    rm -rf /var/lib/apt/lists/*

RUN apt-get update && apt-get install -y openssh-server
RUN service ssh start

RUN pip3 install pandas pyarrow grpcio grpcio-status googleapis-common-protos plotly matplotlib tqdm

RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


USER spark
